#delimit;
clear;
set more off;
capture log close;

capture erase /data/doed_loan_raw.dta;

!st /data/doed_loan_raw.sas7bdat /data/doed_loan_raw.dta

/*****
CHANGE PATH
*****/;
local in "/data";
local pathtab "/data";

/****************
I'm dropping the loans that were matched based on matching criteria=5
*****************/;

use "`in'/doed_loan_raw.dta", clear;

format dt %td;
format per_beg_dt %td;
format per_end_dt %td;
format loan_stat_dt %td;
format curr_mat_dt %td;
format curr_dis_dt %td;
format curr_can_dt %td;
format out_prin_bal_dt %td;

/*By this time, the oldest person in the sample was 15 years old, too young to be in college.
I do this because there are 54 cases that enrollment starts in 1900*/
gen end_year_1988=mdy(6,30,1988);
drop if per_beg_dt<end_year_1988;
drop end_year_1988;
/*163*/

drop if match_criteria==5;

/*Dropping loans that were cancelled*/
drop if tot_dis==0;

rename tuedt2003 pid;

gen loan_type_num=1 if loan_type=="D1";
replace loan_type_num=2 if loan_type=="D2";
replace loan_type_num=3 if loan_type=="D3";
replace loan_type_num=4 if loan_type=="D4";
replace loan_type_num=5 if loan_type=="D5";
replace loan_type_num=6 if loan_type=="D6";
replace loan_type_num=7 if loan_type=="D7";
replace loan_type_num=8 if loan_type=="EU";
replace loan_type_num=9 if loan_type=="FI";
replace loan_type_num=10 if loan_type=="IC";
replace loan_type_num=11 if loan_type=="GB";
replace loan_type_num=12 if loan_type=="NU";
replace loan_type_num=13 if loan_type=="PE";
replace loan_type_num=14 if loan_type=="PK";
replace loan_type_num=15 if loan_type=="PL";
replace loan_type_num=16 if loan_type=="SE";
replace loan_type_num=17 if loan_type=="SF";
replace loan_type_num=18 if loan_type=="SL";
replace loan_type_num=19 if loan_type=="SU";
replace loan_type_num=20 if loan_type=="SG";
replace loan_type_num=21 if loan_type=="CL";
replace loan_type_num=22 if loan_type=="PU";
replace loan_type_num=23 if loan_type=="DU";
replace loan_type_num=24 if loan_type=="RF";

label var loan_type_num "Type of loan (numeric var of loan_type)";
label define loan_type_lab 1 "1=Direct Staf Sub" 2 "2=Direct Staf Unsub" 3 "3=Direct Plus Grad" 4 "4=Direct plus"
	 5 "5=Direct cons Unsub" 6 "6=Direct cons sub" 7 "7=Direct plus cons" 8 "8=Perk Expanded Lending" 9 "9=Federalluy Insured (FISL)"
	 10 "10=ICL" 11 "11=FFEL PLUS Grad" 12 "12=NDSL" 13 "13=Fed Pell Grant" 14 "14=Fed Perkins Loan" 15 "15=FFEL PLUS Loan" 16 "16=FSEOG" 17 "17=FFEL Staf Sub" 18 "18=Supplemental Loan (SLS)"
	 19 "19=FFEL Staf Unsub" 20 "20=Nat'l Sci. & Math Access to retain talent grant" 21 "21=FFEL Cons Loan"  22 "22=Perk Loan"  23 "23=SNat'l Defense Loan"  24 "24=FFEL Refi Loan";
label values loan_type_num loan_type_lab;

/*Consolidated loan*/;
gen cons_loan=(loan_type_num==5 | loan_type_num==6 | loan_type_num==21);
/*Parent Plus-As difference from plus and plus grad*/;
gen plus_par_loan=(loan_type_num==4 | loan_type_num==15);

/*If the loan is a consolidated loan, per_beg_dt and per_end_dt has no meaning, and they almost always take the value 01jan2001. I'll replace them for missing*/
drop if cons_loan==1;
drop if plus_par_loan==1;

/*Making opeid compatible*/
gen opeid_1=substr(opeid,1,6);
gen opeid_2=substr(opeid,7,2);
drop opeid;
gen opeid=opeid_1+"-"+opeid_2 if opeid_1~="";
drop opeid_2;

/*Sorting by pid and per_beg_dt to know the time people enrolled for the first time*/
sort pid per_beg_dt;

by pid: gen first_col_opeid_nslds=opeid if _n==1;
by pid: gen first_col_enroll_nslds=per_beg_dt if _n==1;
format first_col_enroll_nslds %td;

preserve;
sort pid per_end_dt;
by pid: gen last_col_opeid_nslds=opeid if _n==_N;
by pid: gen last_col_enroll_nslds=per_end_dt if _n==_N;
format last_col_enroll_nslds %td;
keep pid last_col*;
by pid: keep if _n==_N;
save "`pathtab'/temp_nslds_tomerge_last_col.dta", replace;
restore;

keep pid first_col*;
keep if first_col_enroll~=.;

gen year=year(first_col_enroll);
gen month=month(first_col_enroll);
gen year_enroll=year-1 if month<=8;
replace year_enroll=year if month>=9;

drop year month;
sort pid;

merge 1:1 pid using "`pathtab'/temp_nslds_tomerge_last_col.dta";
drop _m;
dis _N; /*13,618*/
save "`pathtab'/temp_nslds_tomerge_first_col.dta", replace;

gen first_col_opeid6_nslds=substr(first_col_opeid_nslds,1,6);

replace year_enroll=1995 if year_enroll<1995;
sort first_col_opeid_nslds year_enroll;
merge m:1 first_col_opeid_nslds year_enroll using "`pathtab'/ipeds_state.dta";
keep if _merge==3;
drop idx _merge unitid first_col_opeid6_nslds;

save "`pathtab'/ipeds_state_merge_opeid8_temp.dta", replace;

use "`pathtab'/temp_nslds_tomerge_first_col.dta", clear;
gen first_col_opeid6_nslds=substr(first_col_opeid_nslds,1,6);

replace year_enroll=1995 if year_enroll<1995;
sort first_col_opeid_nslds year_enroll;
merge m:1 first_col_opeid_nslds year_enroll using "`pathtab'/ipeds_state.dta";
keep if _merge==1;
drop _merge;
drop first_col_state first_col_state_fips first_col_sector;

merge m:1 first_col_opeid6_nslds year_enroll using "`pathtab'/ipeds_state_opeid6.dta";
keep if _merge==3;
drop _merge;

append using "`pathtab'/ipeds_state_merge_opeid8_temp.dta";

drop idx unitid first_col_opeid6_nslds;
sort pid;

save "`pathtab'/ipeds_state_merge_opeid8_temp.dta", replace;

use "`pathtab'/temp_nslds_tomerge_first_col.dta", clear;
gen first_col_opeid6_nslds=substr(first_col_opeid_nslds,1,6);

replace year_enroll=1995 if year_enroll<1995;
sort first_col_opeid_nslds year_enroll;
merge m:1 first_col_opeid_nslds year_enroll using "`pathtab'/ipeds_state.dta";
keep if _merge==1;
drop _merge;
drop first_col_instnm_nslds first_col_state first_col_state_fips first_col_sector;

merge m:1 first_col_opeid6_nslds year_enroll using "`pathtab'/ipeds_state_opeid6.dta";
keep if _merge==1;
drop _merge;
drop idx unitid first_col_opeid6_nslds;
drop first_col_state first_col_state_fips first_col_sector;

merge m:1 first_col_opeid_nslds using "`pathtab'/ipeds_state_noyear.dta";
keep if _merge==3;
drop _merge;

sort pid;
count if pid==pid[_n-1];

append using "`pathtab'/ipeds_state_merge_opeid8_temp.dta";

save "`pathtab'/ipeds_state_merge_opeid8_temp.dta", replace;


use "`pathtab'/temp_nslds_tomerge_first_col.dta", clear;
gen first_col_opeid6_nslds=substr(first_col_opeid_nslds,1,6);

replace year_enroll=1995 if year_enroll<1995;
sort first_col_opeid_nslds year_enroll;
merge m:1 first_col_opeid_nslds year_enroll using "`pathtab'/ipeds_state.dta";
keep if _merge==1;
drop _merge;
drop first_col_instnm_nslds first_col_state first_col_state_fips first_col_sector;

merge m:1 first_col_opeid6_nslds year_enroll using "`pathtab'/ipeds_state_opeid6.dta";
keep if _merge==1;
drop _merge;
drop idx unitid first_col_opeid6_nslds;
drop first_col_state first_col_state_fips first_col_sector;

merge m:1 first_col_opeid_nslds using "`pathtab'/ipeds_state_noyear.dta";
keep if _merge==1;
drop _merge;
drop first_col_state first_col_state_fips first_col_sector;

rename first_col_opeid_nslds opeid;
merge m:1 opeid using "`pathtab'/opeid_NSC.dta";
keep if _merge~=2;
drop _merge;

sort pid;
count if pid==pid[_n-1];

rename opeid first_col_opeid_nslds;
rename College_State first_col_state;

append using "`pathtab'/ipeds_state_merge_opeid8_temp.dta";
drop year_enroll;

rename first_col_state first_col_state_nslds;
rename first_col_state_fips first_col_state_fips_nslds;
rename first_col_sector first_col_sector_nslds;

sort pid;
save "`pathtab'/first_col_nslds_state.dta", replace;

erase "`pathtab'/ipeds_state_merge_opeid8_temp.dta";
